## Guisinger and Singer 2010 IO

library(foreign)
library(Amelia)

## Load original dataset
gs2010 <- read.dta("GS2010 IO Rep Data.dta")
head(gs2010)
dim(gs2010)
summary(gs2010)

## Drop ID vars
gs2010$wbcode <- gs2010$wbname <- NULL

## Drop derived dummy vars
gs2010$t70 <- gs2010$t75 <- gs2010$t80 <- gs2010$t85 <- gs2010$t90 <- gs2010$t95 <- gs2010$t00 <- NULL

## How many variables? 46: no reduction necessary
dim(gs2010)

## Imputation
## What is average percentage of missing data?
NAs <- function(x) {
    as.vector(apply(x, 2, function(x) length(which(is.na(x)))))
    }
NAs(gs2010)
mean(NAs(gs2010)/nrow(gs2010))*100

## Thus: 40 imputations

## Note: inf_ln already has lag
set.seed(02138)
gs2010.out <- amelia(gs2010, m = 40, ts = "year", cs = "cnum", polytime = 3, leads = c("both_fix_lag", "idejure_credl_djfix"), empri = 0.01*nrow(gs2010))

write.amelia(obj=gs2010.out, file.stem = "G2010 IO Imp Data", format = "dta", separate = FALSE)

## m = 5
set.seed(02138)
gs2010.out.5 <- amelia(gs2010, m = 5, ts = "year", cs = "cnum", polytime = 3, leads = c("both_fix_lag", "idejure_credl_djfix"), empri = 0.01*nrow(gs2010))

write.amelia(obj=gs2010.out.5, file.stem = "GS2010 IO Imp Data 5", format = "dta", separate = FALSE)

## m = % incomplete observations
complete <- complete.cases(gs2010)
sum(!complete)/nrow(gs2010)*100
## Thus: 90 imputations
set.seed(02138)
gs2010.out.90 <- amelia(gs2010, m = 90, ts = "year", cs = "cnum", polytime = 3, leads = c("both_fix_lag", "idejure_credl_djfix"), empri = 0.01*nrow(gs2010))

write.amelia(obj=gs2010.out.90, file.stem = "GS2010 IO Imp Data 90", format = "dta", separate = FALSE)

## HD
gs2010.out.hd <- hot.deck(gs2010, m = 40)
gs2010.out.hd <- hd2amelia(gs2010.out.hd)
write.amelia(obj= gs2010.out.hd, file.stem = "GS2010 IO Imp Data HD", format = "dta", separate = FALSE)

## MICE
gs2010_mice <- mice(gs2010, m = 40)
datlist <- mids2datlist(gs2010_mice)
gs2010_amelia <- list( "imputations"= datlist)
class(gs2010_amelia) <- "amelia"
write.amelia(obj= gs2010_amelia, file.stem = "GS2010 IO Imp Data MICE", format = "dta", separate = FALSE)
